from IPython.display import HTML
HTML('''<button type="button" class="btn btn-outline-danger" onclick="codeToggle();">Toggle Code</button>''')
The unexpected onset of the deadly COVID-19 pandemic stirred up chaos in the entire world as it rapidly spread to every corner of the world. Vigilant and proactive countries could slow their growth and decrease the damage done, while countries with incompetent governments and inadequate measures suffered miserably. The economic status of a country is one of the best indicators of how it would react to a crisis. Quality of life improvements such as frequent sanitization, social distancing, and distribution of masks are effective ways to at least slow the spread of a virus, but not barred to it. As this study will show, there are many outliers to the expectations; economically-well-off countries buckled under the virus because they couldn’t enforce strong enough measures quick enough, or because the people didn’t trust their governments enough to follow protocol; small, underdeveloped countries with poor living conditions and a population not privy with a pandemic could stifle the growth of the virus with appropriate measures. In the end, the only hopeful solution to the pandemic was the rollout of effective vaccines.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly
import random
%matplotlib inline
plotly.offline.init_notebook_mode()
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv("./data/owid-covid-data.csv")
# df.sample(5)
required_columns = ["iso_code", "location", "continent", "date", "new_cases_smoothed", "total_cases"]
covid_df = df.dropna(subset = required_columns)
covid_df = covid_df.sort_values("date")
fig = px.scatter_geo(covid_df, locations="iso_code", color="continent",
hover_name="location", size="total_cases",
projection="natural earth", animation_frame="date", template="plotly_dark")
fig.show()
df_covid = pd.read_csv("./data/owid-covid-data.csv")
group_df = df_covid[df_covid.date == "2022-04-04"]
group_df = group_df[['continent', 'date', 'location', 'total_cases_per_million', 'total_deaths_per_million', 'population_density', 'gdp_per_capita']]
group_df.head(1)
group_df.isnull().sum(axis = 0)
len(group_df)
group_df = group_df.dropna(how='any')
len(group_df)
group_df['population_density'] = group_df['population_density'] * 100
group_df = group_df[group_df['location'] != 'Hong Kong']
group_df = group_df[group_df['location'] != 'Singapore']
fig = px.scatter(group_df, x="population_density", y="gdp_per_capita", animation_frame="continent", color="location", hover_name="location",
size="total_cases_per_million", size_max = 25, template="plotly_dark")
fig.update_yaxes(fixedrange=False, autorange = True,)
fig.update_layout(showlegend=False)
fig.update_layout(
title="Fig. 1 - GDP per Capita vs Population density and the Total Cases Per Million Population.",
xaxis_title="Population density",
yaxis_title="GDP per Capita",
# legend_title="Legend Title",
template="plotly_dark"
)
fig["layout"].pop("updatemenus")
fig.show()
Fig. 1 - GDP per Capita vs Population density for all the countries grouped by their respective continents.The size of the graoh marker represent the Total Cases Per Million Population for that particular country.
conti_groups = group_df.groupby('continent').mean()
conti_groups = conti_groups.reset_index()
list(conti_groups['continent'])
['Africa', 'Asia', 'Europe', 'North America', 'Oceania', 'South America']
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# Define color sets of paintings
night_colors = ['rgb(56, 75, 126)', 'rgb(18, 36, 37)', 'rgb(34, 53, 101)',
'rgb(36, 55, 57)', 'rgb(6, 4, 4)']
sunflowers_colors = ['rgb(177, 127, 38)', 'rgb(205, 152, 36)', 'rgb(99, 79, 37)',
'rgb(129, 180, 179)', 'rgb(124, 103, 37)']
irises_colors = ['rgb(33, 75, 99)', 'rgb(79, 129, 102)', 'rgb(151, 179, 100)',
'rgb(175, 49, 35)', 'rgb(36, 73, 147)']
cafe_colors = ['rgb(146, 123, 21)', 'rgb(177, 180, 34)', 'rgb(206, 206, 40)',
'rgb(175, 51, 21)', 'rgb(35, 36, 21)']
specs = [[{'type':'domain'}, {'type':'domain'}]]
fig1 = make_subplots(rows=1, cols=2, specs=specs)
labels = list(conti_groups['continent'])
# Define pie charts
fig1.add_trace(go.Pie(labels=labels, values=conti_groups['total_cases_per_million'],name='Case Density',
marker_colors=night_colors,), 1, 1)
fig1.add_trace(go.Pie(labels=labels, values=conti_groups['total_deaths_per_million'],name='Death Density',
marker_colors=sunflowers_colors), 1, 2)
# fig1.add_trace(go.Pie(labels=labels, values=conti_groups['gdp_per_capita'],name='GDP per Capita',
# marker_colors=irises_colors), 2, 1)
# fig1.add_trace(go.Pie(labels=labels, values= conti_groups['population_density'],name= 'Population Density',
# marker_colors=cafe_colors), 2, 2)
# Tune layout and hover info
fig1.update_traces(hoverinfo='label+percent+name')
fig1.update(layout_title_text='Fig. 2.1 - Average COVID Cases and Deaths Analysis - Grouped by Continent', layout_showlegend=False)
fig1.update_layout(
xaxis_title="Population density",
yaxis_title="GDP per Capita",
template="plotly_dark"
# legend_title="Legend Title",
)
fig1 = go.Figure(fig1)
fig1.show()
specs = [[{'type':'domain'}, {'type':'domain'}]]
fig2 = make_subplots(rows=1, cols=2, specs=specs)
labels = list(conti_groups['continent'])
# Define pie charts
# fig2.add_trace(go.Pie(labels=labels, values=conti_groups['total_cases_per_million'],name='Case Density',
# marker_colors=night_colors), 1, 1)
# fig2.add_trace(go.Pie(labels=labels, values=conti_groups['total_deaths_per_million'],name='Death Density',
# marker_colors=sunflowers_colors), 1, 2)
fig2.add_trace(go.Pie(labels=labels, values=conti_groups['gdp_per_capita'],name='GDP per Capita',
marker_colors=irises_colors), 1, 1)
fig2.add_trace(go.Pie(labels=labels, values= conti_groups['population_density'],name= 'Population Density',
marker_colors=cafe_colors), 1, 2)
# Tune layout and hover info
fig2.update_traces(hoverinfo='label+percent+name')
fig2.update(layout_title_text='Average GDP and Populatio Density Analysis - Grouped by Continent', layout_showlegend=False)
fig2.update(layout_title_text='Fig. 2.2 - Average GDP per Capita and Population Density - Grouped by Continent', layout_showlegend=False)
fig2.update_layout(
xaxis_title="Population density",
yaxis_title="GDP per Capita",
template="plotly_dark"
# legend_title="Legend Title",
)
fig2 = go.Figure(fig2)
fig2.show()
Starting at the top, with the countries grouped under their respective continents (Fig. 1), we can see in Fig. 2.1 and 2.2 that Europe leads in average case density, taking up 39.3% of the graph, and is a major contributor in death density with 32.3%. Europe also has the highest GDP per capita (30.7%) and a lower population density (13.4%). The low population essentially assuages the high case and death numbers, but the GDP suggests that these well-faring countries were underprepared for the pandemic. Next up in case density, is South America (16.2%), whilst having the highest death density (33.7%), GDP per capita spanning 12.7% of the graph and the lowest population density (2.05%). The difference between the case and death density shows a lack of proper medical facilities, which is also reflected by the average GDP, but the low population density can be considered a good measure from a utilitarian point of view. North America is close behind with a case density taking up 15.9% of the graph and death density taking up 18.1%. Its population density (21.3%) presents a problem if seen with respect to the death density. Similarly, Asia has a low death density (9%) but its high population density (49.8%) negates what should have been a good value. Evidently, these measures aren’t enough to coherently analyze the impact of the virus. The following indices when put against the case densities provide a solution to it.
policy_df = df_covid[['location', 'continent', 'date','stringency_index','total_cases_per_million', 'population']]
policy_df.dropna(subset=['location', 'continent'], how='any', inplace=True)
new_policy_df = policy_df.copy()
new_policy_df.dropna(subset=['stringency_index'], how='any', inplace=True)
new_policy_df.isnull().sum(axis = 0)
temp_df = new_policy_df.groupby('location').mean()
temp_df = temp_df.reset_index()
temp_df = temp_df[['location', 'stringency_index', 'population']]
len(temp_df)
policy_df = policy_df[policy_df.date == '2022-04-04']
len(policy_df)
215
policy_df
| location | continent | date | stringency_index | total_cases_per_million | population | |
|---|---|---|---|---|---|---|
| 770 | Afghanistan | Asia | 2022-04-04 | 16.67 | 4465.799 | 39835428.0 |
| 2322 | Albania | Europe | 2022-04-04 | NaN | 95342.601 | 2872934.0 |
| 3092 | Algeria | Africa | 2022-04-04 | NaN | 5955.045 | 44616626.0 |
| 3856 | Andorra | Europe | 2022-04-04 | NaN | 517413.450 | 77354.0 |
| 4602 | Angola | Africa | 2022-04-04 | NaN | 2923.178 | 33933611.0 |
| ... | ... | ... | ... | ... | ... | ... |
| 172902 | Vietnam | Asia | 2022-04-04 | 63.89 | 100510.978 | 98168829.0 |
| 173435 | Wallis and Futuna | Oceania | 2022-04-04 | NaN | 40923.021 | 11094.0 |
| 174964 | Yemen | Asia | 2022-04-04 | 35.19 | 387.266 | 30490639.0 |
| 175712 | Zambia | Africa | 2022-04-04 | NaN | 16759.989 | 18920657.0 |
| 176458 | Zimbabwe | Africa | 2022-04-04 | NaN | 16340.393 | 15092171.0 |
215 rows × 6 columns
temp = list(temp_df['location'])
policy_df = policy_df[policy_df['location'].isin(temp)]
temp2 = list(policy_df['location'])
final_df = temp_df[temp_df['location'].isin(temp2)]
len(final_df)
182
temp_list = list(policy_df['total_cases_per_million'])
final_df['case_den'] = temp_list
final_df = final_df.reset_index()
# finaldf.drop(['index'])
final_df
| level_0 | index | location | stringency_index | population | case_den | |
|---|---|---|---|---|---|---|
| 0 | 0 | 0 | Afghanistan | 35.542387 | 39835428.0 | 4465.799 |
| 1 | 1 | 1 | Albania | 55.932109 | 2872934.0 | 95342.601 |
| 2 | 2 | 2 | Algeria | 63.866882 | 44616626.0 | 5955.045 |
| 3 | 3 | 3 | Andorra | 46.834267 | 77354.0 | 517413.450 |
| 4 | 4 | 4 | Angola | 64.801156 | 33933611.0 | 2923.178 |
| ... | ... | ... | ... | ... | ... | ... |
| 177 | 177 | 181 | Venezuela | 73.913444 | 28704947.0 | 18146.872 |
| 178 | 178 | 182 | Vietnam | 65.311594 | 98168829.0 | 100510.978 |
| 179 | 179 | 183 | Yemen | 33.621807 | 30490639.0 | 387.266 |
| 180 | 180 | 184 | Zambia | 42.625385 | 18920657.0 | 16759.989 |
| 181 | 181 | 185 | Zimbabwe | 65.993743 | 15092171.0 | 16340.393 |
182 rows × 6 columns
fig = px.scatter(final_df, x="stringency_index", y="case_den",
color="location", hover_name="location", size = "population", size_max = 50)
# fig = px.scatter(final_df, x="stringency_index", y="case_den", trendline = "ols")
fig.update_layout(
title="Fig. 3.1 - Total Cases per Million Population vs Stringency Index",
xaxis_title="Stringency Index",
yaxis_title="Total Cases per Million Population",
legend_title="COUNTRY",
template="plotly_dark"
)
# fig["layout"].pop("updatemenus") # optional, drop animation buttons
fig.show()
The stringency index is a composite measure based on nine response indicators including school closures, workplace closures, travel bans, restriction of public events, and stay-at-home policies, rescaled to a value from 0 to 100 (100 being the strictest). It gives us an idea of how strongly the countries enforced COVID prevention measures and how it impacted the mortality rate over time. Average stringency index plotted against case density (Fig. 3) gives us a comprehensive measure of how the countries have fared so far, and allows us to identify outliers such as the countries that couldn’t flatten the curve despite enforcing strong measures. The COVID-19 virus originated in China, naturally putting it high on the stringency index axis with an average of 71.54. Most of its neighboring countries like India (68.94), Bhutan (67.31), Bangladesh (71.79), Vietnam (65.31), Pakistan (61.12), Myanmar (75.19), Philippines (70.73), and Kazakhstan (70.72) also exhibited relatively high stringency index and low overall case density (<100k), unlike Greece (69.69) and Italy (71.25) that have a high case density despite (>200k) having high stringency index. Amongst the countries with high population, China has a case density of just 199, Nigeria 1208, and Pakistan 6775. Nigeria managed to be one the countries with the lowest case density despite having a stringency index of 53.32. Indonesia has a case density of 21.76k, and India 30.88k; despite being relatively low on the graph, these aren’t good numbers, especially for India with a population of 1.4 billion. Things get worse with Brazil and the United States that have a case density of 140.26k and 240.84k respectively. The United States had it coming with a stringency index of 57.94, but Brazil seems to be in an unfortunate situation despite having a stringency index of 63.47. Countries with the highest case density include Denmark (527.92k), Andorra (517.41k), Cyprus (495.73k), Iceland (493.04k), and Slovenia (468.89k); these are devastating numbers considering their low population but expected as their stringency indices are lower than 60, with Iceland being the lowest at a disappointing 39.59. Some less populous countries like Yemen, Niger, and Burkina Faso exhibit impressively low case density despite being on the low end of the stringency index axis.
import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objects as go
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
sorted_final_df = final_df.sort_values('stringency_index')
x = np.array(list(sorted_final_df['stringency_index']))
y = np.array(list(sorted_final_df['case_den']))
# plt.figure(figsize=(10,6))
# plt.scatter(x, y)
# plt.show()
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=3, include_bias=False)
poly_features = poly.fit_transform(x.reshape(-1, 1))
from sklearn.linear_model import LinearRegression
poly_reg_model = LinearRegression()
poly_reg_model.fit(poly_features, y)
y_predicted = poly_reg_model.predict(poly_features)
# plt.figure(figsize=(10, 6))
# plt.scatter(x, y)
# plt.plot(x, y_predicted, c="red")
# plt.show()
poly_df = pd.DataFrame(dict(
x = x,
y = y_predicted
))
figure3 = fig = px.line(poly_df, x="x", y="y", color_discrete_sequence=px.colors.qualitative.Light24)
# Create figures in Express
figure1 = fig = px.scatter(final_df, x="stringency_index", y="case_den", hover_name="location")
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=1, include_bias=False)
poly_features = poly.fit_transform(x.reshape(-1, 1))
from sklearn.linear_model import LinearRegression
poly_reg_model = LinearRegression()
poly_reg_model.fit(poly_features, y)
y_predicted = poly_reg_model.predict(poly_features)
# plt.figure(figsize=(10, 6))
# plt.scatter(x, y)
# plt.plot(x, y_predicted, c="red")
# plt.show()
poly_df = pd.DataFrame(dict(
x = x,
y = y_predicted
))
figure4 = fig = px.line(poly_df, x="x", y="y", color_discrete_sequence=px.colors.qualitative.Light24)
figure2 = fig = px.scatter(final_df, x="stringency_index", y="case_den", hover_name="location")
# For as many traces that exist per Express figure, get the traces from each plot and store them in an array.
# This is essentially breaking down the Express fig into it's traces
figure1_traces = []
figure2_traces = []
for trace in range(len(figure1["data"])):
figure1_traces.append(figure1["data"][trace])
for trace in range(len(figure3["data"])):
figure1_traces.append(figure3["data"][trace])
for trace in range(len(figure2["data"])):
figure2_traces.append(figure2["data"][trace])
for trace in range(len(figure4["data"])):
figure2_traces.append(figure4["data"][trace])
#Create a 1x2 subplot
this_figure = sp.make_subplots(rows=1, cols=2)
# Get the Express fig broken down as traces and add the traces to the proper plot within in the subplot
for traces in figure1_traces:
this_figure.append_trace(traces, row=1, col=1)
for traces in figure2_traces:
this_figure.append_trace(traces, row=1, col=2)
#the subplot as shown in the above image
# final_graph = dcc.Graph(figure=this_figure)
this_figure.update_layout(
title="Fig. 3.2 - Curve Fitting in Total Cases per Million Population vs Stringency Index",
# xaxis_title="Stringency Index",
# yaxis_title="Total Cases per Million Population",
# legend_title="Legend Title",
template="plotly_dark",
showlegend=False
)
print(poly_reg_model.coef_)
print(poly_reg_model.intercept_)
from sklearn.metrics import mean_squared_error
country_index = list(sorted_final_df['location'])
print(mean_squared_error(y, y_predicted)/10**12)
print()
ind = np.argpartition(abs(y-y_predicted), -10)[-10:]
# print("Outlier Countries")
print()
outliers = []
for i in ind:
# print(country_index[i])
outliers.append(country_index[i])
import plotly.graph_objects as go
fig = go.Figure(data=[go.Table(header=dict(values=['Outliers']),
cells=dict(values=[outliers]))
])
fig.update_layout(
# title="Fig. 3.2 - Curve Fitting in Total Cases per Million Population vs Human Development Index",
# xaxis_title="Stringency Index",
# yaxis_title="Total Cases per Million Population",
# legend_title="Legend Title",
template="plotly_dark",
width=500, height=229,
margin=dict(l=0, r=0, t=0, b=0),
# showlegend=False
)
fig.show()
[-4.45901619e+03 3.62134358e+02 -3.96666481e+00] -22786.46185620365 0.01978532493845641
df_3d = df_covid[['location','continent', 'date','total_cases_per_million', 'total_vaccinations', 'people_vaccinated', 'people_fully_vaccinated', 'population']]
df_3d['total_vacc_den'] = df_3d['people_vaccinated']/df_3d['population']
df_3d['fully_vacc_den'] = df_3d['people_fully_vaccinated']/df_3d['population']
df_3d = df_3d.dropna(how='any')
len(df_3d)
df_3d['location'].value_counts()
new_df = df_covid[['location','date','human_development_index','total_cases_per_million','population']]
C:\Users\PARAS_~1\AppData\Local\Temp/ipykernel_34132/4214103964.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\PARAS_~1\AppData\Local\Temp/ipykernel_34132/4214103964.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
a = []
for idx,row in new_df.iterrows():
if row['location'] == 'India':
a.append(row['human_development_index'])
print(np.unique(np.array(a)))
[0.645]
countries = np.unique(np.array(new_df['location']))
# countries = countries[::10]
# countries = np.append(countries,'India')
# countries
new_df = new_df.sort_values('date')
new_df = new_df.reset_index(drop = True)
new_df = new_df[170000:].reset_index(drop = True)
# new_df.head
final = []
for country in countries:
for idx,row in new_df.iterrows():
if row['date'] == '2022-04-04' and row['location'] == country:
human_development_index = row['human_development_index']
total_cases_per_million = row['total_cases_per_million']
population = row['population']
if row['human_development_index']>0 and row['total_cases_per_million']>0:
final.append((country,human_development_index,total_cases_per_million,population))
# print(final)
hdi_df = pd.DataFrame(final, columns = ['country','human_development_index','total_cases_per_million','population'])
hdi_df = hdi_df[hdi_df['country'] != 'World']
len(hdi_df)
188
fig = px.scatter(hdi_df, x="human_development_index", y="total_cases_per_million", hover_data=['country'], color = 'country', size = "population", size_max = 50)
# fig.show()
fig.update_layout(
title="Fig. 4.1 - Total Cases per Million Population vs Human Development Index",
xaxis_title="Human Development Index",
yaxis_title="Total Cases per Million Population",
legend_title="COUNTRY",
template="plotly_dark",
# showlegend=False
)
import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objects as go
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
sorted_hdi_df = hdi_df.sort_values('human_development_index')
x = np.array(list(sorted_hdi_df['human_development_index']))
y = np.array(list(sorted_hdi_df['total_cases_per_million']))
# plt.figure(figsize=(10,6))
# plt.scatter(x, y)
# plt.show()
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=1, include_bias=False)
poly_features = poly.fit_transform(x.reshape(-1, 1))
from sklearn.linear_model import LinearRegression
poly_reg_model = LinearRegression()
poly_reg_model.fit(poly_features, y)
y_predicted = poly_reg_model.predict(poly_features)
# plt.figure(figsize=(10, 6))
# plt.scatter(x, y)
# plt.plot(x, y_predicted, c="red")
# plt.show()
poly_df = pd.DataFrame(dict(
x = x,
y = y_predicted
))
figure3 = fig = px.line(poly_df, x="x", y="y", color_discrete_sequence=px.colors.qualitative.Light24)
# Create figures in Express
figure1 = fig = px.scatter(hdi_df, x="human_development_index", y="total_cases_per_million", hover_name="country")
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=3, include_bias=False)
poly_features = poly.fit_transform(x.reshape(-1, 1))
from sklearn.linear_model import LinearRegression
poly_reg_model = LinearRegression()
poly_reg_model.fit(poly_features, y)
y_predicted = poly_reg_model.predict(poly_features)
# plt.figure(figsize=(10, 6))
# plt.scatter(x, y)
# plt.plot(x, y_predicted, c="red")
# plt.show()
poly_df = pd.DataFrame(dict(
x = x,
y = y_predicted
))
figure4 = fig = px.line(poly_df, x="x", y="y", color_discrete_sequence=px.colors.qualitative.Light24)
figure2 = fig = px.scatter(hdi_df, x="human_development_index", y="total_cases_per_million", hover_name="country")
# For as many traces that exist per Express figure, get the traces from each plot and store them in an array.
# This is essentially breaking down the Express fig into it's traces
figure1_traces = []
figure2_traces = []
for trace in range(len(figure1["data"])):
figure1_traces.append(figure1["data"][trace])
for trace in range(len(figure3["data"])):
figure1_traces.append(figure3["data"][trace])
for trace in range(len(figure2["data"])):
figure2_traces.append(figure2["data"][trace])
for trace in range(len(figure4["data"])):
figure2_traces.append(figure4["data"][trace])
#Create a 1x2 subplot
this_figure = sp.make_subplots(rows=1, cols=2)
# Get the Express fig broken down as traces and add the traces to the proper plot within in the subplot
for traces in figure1_traces:
this_figure.append_trace(traces, row=1, col=1)
for traces in figure2_traces:
this_figure.append_trace(traces, row=1, col=2)
#the subplot as shown in the above image
# final_graph = dcc.Graph(figure=this_figure)
this_figure.update_layout(
title="Fig. 4.2 - Total Cases per Million Population vs Human Development Index",
# xaxis_title="Human Development Index",
# yaxis_title="Total Cases per Million Population",
# legend_title="Legend Title",
template="plotly_dark",
showlegend=False
)
this_figure.show()
The Human Development Index (HDI) is a summary measure of average achievement in key dimensions of human development: a long and healthy life, being knowledgeable and having a decent standard of living. While the stringency index depicts the state of a nation in face of adversity, the HDI shows their general preparedness and expected response for such a situation. Regions and countries with a high human development index should have an easier time dealing with the virus, but it turns out that they generally have a higher case density (Fig. 4). This is due to the international connectedness and mobility of their population related to trade and tourism, vulnerability of older populations, and higher rates of non-communicable diseases. China has a HDI of 0.761 and appropriately responded to the crisis which is evident from its high stringency index. On the higher end of the HDI axis, we have countries like Iceland (0.949), Denmark (0.94), and Slovenia (0.917), and the United States (0.926) that have high case density owing to their low stringency index, but also countries like Japan (0.919), Saudi Arabia (0.854), and Kazakhstan (0.825) that have low case density but vary according to their stringency indices. On the lower end of the HDI axis, we have countries like Niger (0.394), Burkina Faso (0.452), and Yemen (0.47) that have surprisingly low case density (also shown with respect to stringency index). In the middle lie the countries of the Indian subcontinent - India (0.645), Bangladesh (0.632), Nepal (0.602), Myanmar (0.583), and Pakistan (0.557) with their respective case densities more or less fitting the general rising trend. Since these countries are also neighbors of China, they were already vigilant and imposed proper measures. Regardless, the case densities for India and Pakistan present a substantial problem as they have high populations, much higher than the others. It goes to show that the human development of a country does not have a substantial effect on its ability to deal with a crisis, in this case, a deadly virus that managed to cause great damage because the country wasn’t well equipped to battle it, regardless of how healthy or how literate or how well-off it was before.
from sklearn.metrics import mean_squared_error
print(mean_squared_error(y, y_predicted)/10**12)
from sklearn.metrics import mean_squared_error
country_index = list(sorted_hdi_df['country'])
print(mean_squared_error(y, y_predicted)/10**12)
print()
print(poly_reg_model.coef_)
print(poly_reg_model.intercept_)
ind = np.argpartition(abs(y-y_predicted), -10)[-10:]
# print("Outlier Countries")
print()
outliers = []
for i in ind:
# print(country_index[i])
outliers.append(country_index[i])
import plotly.graph_objects as go
fig = go.Figure(data=[go.Table(header=dict(values=['Outliers']),
cells=dict(values=[outliers]))
])
fig.update_layout(
# title="Fig. 3.2 - Curve Fitting in Total Cases per Million Population vs Human Development Index",
# xaxis_title="Stringency Index",
# yaxis_title="Total Cases per Million Population",
# legend_title="Legend Title",
template="plotly_dark",
width=500, height=229,
margin=dict(l=0, r=0, t=0, b=0),
# showlegend=False
)
fig.show()
0.008433335326402018 0.008433335326402018 [692040.01958266] -375777.165131785
# final_df = final_df.reset_index()
new_countries = final_df['location']
new_countries = list(new_countries)
final_df = final_df.sort_values(['location'])
hdi_df = hdi_df.sort_values(['country'])
countries_hdi = []
for index,row in hdi_df.iterrows():
if row['country'] not in countries_hdi:
countries_hdi.append(row['country'])
index_df = final_df[0:0]
for index,row in final_df.iterrows():
if row['location'] in countries_hdi:
index_df.loc[len(index_df.index)] = row
final_countries = list(index_df['location'])
C:\Users\Paras_Gupta\anaconda3\lib\site-packages\pandas\core\indexing.py:723: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
h = []
for index,row in hdi_df.iterrows():
if row['country'] in final_countries:
h.append(row['human_development_index'])
index_df['hdi'] = h
C:\Users\PARAS_~1\AppData\Local\Temp/ipykernel_34132/685853260.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
# Stringency
# [-4.45901619e+03 3.62134358e+02 -3.96666481e+00]
# -22786.461856194568
# HDI
# [-4389258.14691102 5801463.49077724 -2002953.0875958 ]
# 987529.7952458942
h = list(index_df['hdi'])
s = list(index_df['stringency_index'])
# avg_h = sum(h)/len(h)
# avg_s = sum(s)/len(s)
new_metric = []
for i in range(len(h)):
new_metric.append(( 987529.7952458942 + (-4389258.14691102)*h[i] + (5801463.49077724)*h[i]**2 + (-2002953.0875958)*h[i]**3) * (s[i]*(-4.45901619e+03) + (3.62134358e+02)*s[i]**2 + (-3.96666481e+00)*s[i]**3 - 22786.461856194568))
index_df['new_metric'] = new_metric
C:\Users\PARAS_~1\AppData\Local\Temp/ipykernel_34132/2061131486.py:17: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
index_df = index_df[index_df['location'] != 'Nicaragua']
index_df
| level_0 | index | location | stringency_index | population | case_den | hdi | new_metric | |
|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | Afghanistan | 35.542387 | 39835428.0 | 4465.799 | 0.511 | -7.609429e+08 |
| 1 | 1 | 1 | Albania | 55.932109 | 2872934.0 | 95342.601 | 0.795 | 2.638407e+10 |
| 2 | 2 | 2 | Algeria | 63.866882 | 44616626.0 | 5955.045 | 0.748 | 1.526227e+10 |
| 3 | 3 | 3 | Andorra | 46.834267 | 77354.0 | 517413.450 | 0.868 | 3.705538e+10 |
| 4 | 4 | 4 | Angola | 64.801156 | 33933611.0 | 2923.178 | 0.581 | 3.748821e+08 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 167 | 177 | 181 | Venezuela | 73.913444 | 28704947.0 | 18146.872 | 0.711 | 1.933863e+09 |
| 168 | 178 | 182 | Vietnam | 65.311594 | 98168829.0 | 100510.978 | 0.704 | 9.287640e+09 |
| 169 | 179 | 183 | Yemen | 33.621807 | 30490639.0 | 387.266 | 0.470 | -1.572671e+08 |
| 170 | 180 | 184 | Zambia | 42.625385 | 18920657.0 | 16759.989 | 0.584 | 5.358326e+08 |
| 171 | 181 | 185 | Zimbabwe | 65.993743 | 15092171.0 | 16340.393 | 0.571 | -1.323258e+07 |
171 rows × 8 columns
fig = px.scatter(index_df, x="new_metric", y="case_den", hover_data=['location'], color = 'location', size = "population", size_max = 50)
fig.update_layout(
title="Fig. 5.1 - Total Cases per Million Population vs New Metric (HDI and Stringency Index)",
xaxis_title="New Metric (HDI and Stringency Index)",
yaxis_title="Total Cases per Million Population",
legend_title="COUNTRY",
template="plotly_dark",
# showlegend
)
fig.show()
import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objects as go
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
sorted_index_df = index_df.sort_values('new_metric')
x = np.array(list(sorted_index_df['new_metric']))
y = np.array(list(sorted_index_df['case_den']))
# plt.figure(figsize=(10,6))
# plt.scatter(x, y)
# plt.show()
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=1, include_bias=False)
poly_features = poly.fit_transform(x.reshape(-1, 1))
from sklearn.linear_model import LinearRegression
poly_reg_model = LinearRegression()
poly_reg_model.fit(poly_features, y)
y_predicted = poly_reg_model.predict(poly_features)
# plt.figure(figsize=(10, 6))
# plt.scatter(x, y)
# plt.plot(x, y_predicted, c="red")
# plt.show()
poly_df = pd.DataFrame(dict(
x = x,
y = y_predicted
))
figure3 = fig = px.line(poly_df, x="x", y="y", color_discrete_sequence=px.colors.qualitative.Light24)
# Create figures in Express
figure1 = fig = px.scatter(index_df, x="new_metric", y="case_den", hover_name="location")
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=3, include_bias=False)
poly_features = poly.fit_transform(x.reshape(-1, 1))
from sklearn.linear_model import LinearRegression
poly_reg_model = LinearRegression()
poly_reg_model.fit(poly_features, y)
y_predicted = poly_reg_model.predict(poly_features)
# plt.figure(figsize=(10, 6))
# plt.scatter(x, y)
# plt.plot(x, y_predicted, c="red")
# plt.show()
poly_df = pd.DataFrame(dict(
x = x,
y = y_predicted
))
figure4 = fig = px.line(poly_df, x="x", y="y", color_discrete_sequence=px.colors.qualitative.Light24)
figure2 = fig = px.scatter(index_df, x="new_metric", y="case_den", hover_name="location")
# For as many traces that exist per Express figure, get the traces from each plot and store them in an array.
# This is essentially breaking down the Express fig into it's traces
figure1_traces = []
figure2_traces = []
for trace in range(len(figure1["data"])):
figure1_traces.append(figure1["data"][trace])
for trace in range(len(figure3["data"])):
figure1_traces.append(figure3["data"][trace])
for trace in range(len(figure2["data"])):
figure2_traces.append(figure2["data"][trace])
for trace in range(len(figure4["data"])):
figure2_traces.append(figure4["data"][trace])
#Create a 1x2 subplot
this_figure = sp.make_subplots(rows=1, cols=2)
# Get the Express fig broken down as traces and add the traces to the proper plot within in the subplot
for traces in figure1_traces:
this_figure.append_trace(traces, row=1, col=1)
for traces in figure2_traces:
this_figure.append_trace(traces, row=1, col=2)
#the subplot as shown in the above image
# final_graph = dcc.Graph(figure=this_figure)
this_figure.update_layout(
title="Fig. 5.2 - Curve Fitting in Total Cases per Million Population vs New Metric",
# xaxis_title="New Metric (HDI and Stringency Index)",
# yaxis_title="Total Cases per Million Population",
# legend_title="COUNTRY",
template="plotly_dark",
showlegend = False,
)
this_figure.show()
from sklearn.metrics import mean_squared_error
print(mean_squared_error(y, y_predicted)/10**12)
country_index = list(sorted_index_df['location'])
print(mean_squared_error(y, y_predicted)/10**12)
print()
ind = np.argpartition(abs(y-y_predicted), -10)[-10:]
outliers = []
for i in ind:
# print(country_index[i])
outliers.append(country_index[i])
import plotly.graph_objects as go
fig = go.Figure(data=[go.Table(header=dict(values=['Outliers']),
cells=dict(values=[outliers]))
])
fig.update_layout(
# title="Fig. 3.2 - Curve Fitting in Total Cases per Million Population vs Human Development Index",
# xaxis_title="Stringency Index",
# yaxis_title="Total Cases per Million Population",
# legend_title="Legend Title",
template="plotly_dark",
width=500, height=229,
margin=dict(l=0, r=0, t=0, b=0),
# showlegend=False
)
fig.show()
0.006832562280942828 0.006832562280942828
# countries = ['India','Pakistan','Sri Lanka','China','Bangladesh','Bhutan','Nepal','Myanmar']
countries = ['India','Pakistan', 'Nepal', 'Bangladesh', 'United States', 'Denmark', 'France']
df_vacc = df_covid[['location','date','total_cases','people_vaccinated','total_deaths','total_cases_per_million','total_deaths_per_million','people_vaccinated_per_hundred','new_cases_smoothed_per_million','new_deaths_smoothed_per_million']]
for country in countries:
country_df = df_vacc[0:0]
for index,row in df_vacc.iterrows():
if row['location'] == country and row['total_cases'] > 0 and row['people_vaccinated'] > 0 and row['total_deaths'] > 0:
country_df.loc[len(country_df.index)] = row
country_df['people_vaccinated_per_million'] = country_df['people_vaccinated_per_hundred']*10000
country_df['people_vaccinated_per_thousand'] = country_df['people_vaccinated_per_hundred']*10
country_df['new_deaths_smoothed_per_hundred_million'] = country_df['new_deaths_smoothed_per_million']*100
fig = px.line(country_df, x="date", y=['new_cases_smoothed_per_million','new_deaths_smoothed_per_hundred_million','people_vaccinated_per_hundred'], title = country)
fig.update_layout(
title="Fig. 6 -New Cases vs New Deaths vs Total People Vaccinated -- " + str(country),
xaxis_title="Date",
yaxis_title="Number of People",
# legend_title="COUNTRY",
template="plotly_dark",
showlegend = False,
)
# this_figure.show()
fig.show()
import plotly.graph_objs as go
df = pd.read_csv("./data/country_vaccinations_by_manufacturer.csv")
np.unique(np.array(df['vaccine']))
np.unique(np.array(df['location']))
np.unique(np.array(df['location']))
countries = ['Argentina','Belgium','Denmark','France','Germany','Italy','Japan','Nepal','Norway','Peru','Portugal','South Africa','South Korea','Spain','Ukraine','United States',]
vaccines = ['Moderna','Pfizer/BioNTech','Johnson&Johnson','Oxford/AstraZeneca']
new_df = df[0:0]
for index,row in df.iterrows():
if row['location'] in countries and row['vaccine'] in vaccines:
new_df.loc[len(new_df.index)] = row
final_vacs = {}
for vaccine in vaccines:
# print(vaccine)
vac = []
country_list = countries.copy()
for index,row in new_df[::-1].iterrows():
if row['vaccine'] == vaccine and row['location'] in country_list:
vac.append((row['location'],row['total_vaccinations']))
country_list.remove(row['location'])
final_vacs[vaccine] = vac
vs = []
for vaccine in final_vacs.keys():
# print(vaccine)
v = []
for country in countries:
for i in range(len(final_vacs[vaccine])):
if final_vacs[vaccine][i][0] == country:
v.append(final_vacs[vaccine][i][1])
break
elif i == (len(final_vacs[vaccine]) - 1) :
v.append(0)
vs.append(v)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.graph_objects as px
import plotly.graph_objs as go
x = countries
plot = px.Figure(data=[
go.Bar(name = 'Moderna', x = x, y = vs[0]),
go.Bar(name = 'Pfizer/BioNTech', x = x, y = vs[1]),
go.Bar(name = 'Johnson&Johnson', x = x, y = vs[2]),
go.Bar(name = 'Oxford/AstraZeneca', x = x, y = vs[3]),
])
plot.update_layout(barmode='stack', height = 800)
plot.update_layout(
title="Fig. 6 - Vaccinations by Manufacturer in different Countries",
xaxis_title="Country",
yaxis_title="Vaccinations by Manufacturer",
legend_title="VACCINES",
template="plotly_dark",
# showlegend
)
plot.show()
As unfortunate as the pandemic has been for the world, it’d be naive to ignore the flip-side of the coin, the boom it has produced for the pharmaceutical industry. With vaccines becoming a necessity for everyone, every capable pharma entered the rat race to conquer the market. Whether they were in it for the money or goodwill, they managed to produce results. Pfizer/BioNTech and Moderna, being American companies, produced the bulk of vaccines (> 500M) for the United States. Pfizer was the forerunner, holding a monopoly over the market of almost every first and third world country. They dished out 330M vaccines in the United States alone and around 100M in France, Germany, Italy and Japan. Moderna, although not as predominant as Pfizer, also managed to enter the market of almost every country, rolling out around 30M vaccines each in France, Germany and Italy. In the United States, they went head to head with Pfizer with a total of 210M vaccines sold. While Pfizer and Moderna battled it out for sales, Oxford/AstraZeneca left its mark on European countries and other second and third world countries, notably Argentina and South Korea. Johnson&Johnson tried their hand in the American market but were crushed by Pfizer and Moderna. Their traces also appear in Germany, South Africa, Nepal etc. but far behind the competition to make any difference. Evidently, popularity and trust was a major factor in the sales of vaccines with established pharmas selling more than their counterpart, regardless of the effectiveness in some cases.